import numpy as np
from math import *
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import sys, random, time
import matplotlib.ticker as mtick
from env import single_expert_dynamics,single_expert_stochastic_dynamics, expert1_reward, expert2_reward, expert3_reward, expert1_cost, expert2_cost, expert3_cost, feature1, feature2, feature3, expert_1_basis_constraint, expert_2_basis_constraint, expert_3_basis_constraint
from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes
from mpl_toolkits.axes_grid1.inset_locator import mark_inset
from mpl_toolkits.axes_grid1.inset_locator import inset_axes



iterations=51
num_trials=100
gamma=0.9
num_action=9

def reward_cost_list(trajectories,num_data):
  omega1=np.mat([1.0,-1.0]).T
  omega2=np.mat([1.0,-1.0]).T
  omega3=np.mat([1.0,-1.0]).T
  theta1=np.mat([1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0]).T
  theta2=np.mat([1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0]).T
  theta3=np.mat([1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0]).T
  reward_list=[]
  cost_list=[]
  for i in range(num_data):
    reward=0.0
    cost=0.0
    single_trajectory=trajectories[30*i:30*(i+1),:]
    for j in range(30):
      state1=np.mat(np.copy(single_trajectory[j][0:2])).T
      state2=np.mat(np.copy(single_trajectory[j][2:4])).T
      state3=np.mat(np.copy(single_trajectory[j][4:6])).T
      action1=np.mat(np.copy(single_trajectory[j][6])).T
      action2=np.mat(np.copy(single_trajectory[j][7])).T
      action3=np.mat(np.copy(single_trajectory[j][8])).T
      single_reward=expert1_reward(omega1,state1,action1)+expert2_reward(omega2,state2,action2)+expert3_reward(omega3,state3,action3)
      single_cost=expert1_cost(theta1,state1,action1)+expert2_cost(theta2,state2,action2)+expert3_cost(theta3,state3,action3)
      reward=reward+single_reward
      cost=cost+single_cost
    reward_list.append(reward)
    cost_list.append(cost)
  return reward_list, cost_list


distribution1=np.loadtxt("nominal_optimal_policy1_file.txt",dtype=float)
policy1=distribution1.reshape(9,9,num_action)
distribution2=np.loadtxt("nominal_optimal_policy2_file.txt",dtype=float)
policy2=distribution2.reshape(9,9,num_action)
distribution3=np.loadtxt("nominal_optimal_policy3_file.txt",dtype=float)
policy3=distribution3.reshape(9,9,num_action)

nominal_trajectories=np.loadtxt("nominal_optimal_trajectory_file.txt",dtype=float)
nominal_trajectories=nominal_trajectories.reshape(30*num_trials,9)
nominal_reward_list,nominal_cost_list=reward_cost_list(nominal_trajectories,num_trials)
nominal_constraint_violation=1


cost1_mean=np.array(np.loadtxt("cost1_mean_file.txt",dtype=float))/3000.0
cost2_mean=np.array(np.loadtxt("cost2_mean_file.txt",dtype=float))/3000.0
cost3_mean=np.array(np.loadtxt("cost3_mean_file.txt",dtype=float))/3000.0
cost4_mean=np.array(np.loadtxt("cost4_mean_file.txt",dtype=float))/3000.0
centralized_cost_mean=np.array(np.loadtxt("centralized_cost_mean_file.txt",dtype=float))/3000.0
expert_cost=np.array([0.0]*51)
expert_cost_nominal=np.array([nominal_constraint_violation]*51)
cost_MCE=np.array(np.loadtxt("MCE_cost_mean_file.txt",dtype=float))/3000.0
cost_ME=np.array(np.loadtxt("ME_cost_mean_file.txt",dtype=float))/3000.0

cost1_sd=np.array(np.loadtxt("cost1_sd_file.txt",dtype=float))/3000.0
cost2_sd=np.array(np.loadtxt("cost2_sd_file.txt",dtype=float))/3000.0
cost3_sd=np.array(np.loadtxt("cost3_sd_file.txt",dtype=float))/3000.0
cost4_sd=np.array(np.loadtxt("cost4_sd_file.txt",dtype=float))/3000.0
centralized_cost_sd=np.array(np.loadtxt("centralized_cost_sd_file.txt",dtype=float))/3000.0

false1_positive=np.array(np.loadtxt("false1_positive_file.txt",dtype=float))
false2_positive=np.array(np.loadtxt("false2_positive_file.txt",dtype=float))
false3_positive=np.array(np.loadtxt("false3_positive_file.txt",dtype=float))
false4_positive=np.array(np.loadtxt("false4_positive_file.txt",dtype=float))
centralized_false_positive=np.array(np.loadtxt("centralized_false_positive_file.txt",dtype=float))
ME_false_positive=np.array(np.loadtxt("ME_centralized_false_positive_file.txt",dtype=float))
MCE_false_positive=np.array(np.loadtxt("MCE_centralized_false_positive_file.txt",dtype=float))

false1_negative=np.array(np.loadtxt("false1_negative_file.txt",dtype=float))
false2_negative=np.array(np.loadtxt("false2_negative_file.txt",dtype=float))
false3_negative=np.array(np.loadtxt("false3_negative_file.txt",dtype=float))
false4_negative=np.array(np.loadtxt("false4_negative_file.txt",dtype=float))
centralized_false_negative=np.array(np.loadtxt("centralized_false_negative_file.txt",dtype=float))
ME_false_negative=np.array(np.loadtxt("ME_centralized_false_negative_file.txt",dtype=float))
MCE_false_negative=np.array(np.loadtxt("MCE_centralized_false_negative_file.txt",dtype=float))



plt.rcParams.update({'font.size': 14})
axis=np.arange(0,iterations)
subaxis=np.arange(0,3)
fig, ax = plt.subplots()
ax.plot(axis,cost1_mean,'-')
#ax.fill_between(axis,cost1_mean-cost1_sd,cost1_mean+cost1_sd,alpha=0.2)
ax.plot(axis,cost2_mean,'--')
#ax.fill_between(axis,cost2_mean-cost2_sd,cost2_mean+cost2_sd,alpha=0.2)
ax.plot(axis,cost3_mean,'-.')
#ax.fill_between(axis,cost3_mean-cost3_sd,cost3_mean+cost3_sd,alpha=0.2)
ax.plot(axis,cost4_mean,':')
#ax.fill_between(axis,cost4_mean-cost4_sd,cost4_mean+cost4_sd,alpha=0.2)
ax.plot(axis,centralized_cost_mean,'.',alpha=0.5)
ax.plot(axis,expert_cost,'^',alpha=1.0,markersize=3)
ax.plot(axis,expert_cost_nominal,'v',alpha=1.0,markersize=3)
ax.plot(axis,cost_MCE,'<-',alpha=1.0,markersize=3)
ax.plot(axis,cost_ME,'>-',alpha=1.0,markersize=3)
plt.xticks(np.arange(0,iterations,10))
#plt.title('(a) Constraint Violation Rate')
plt.xlabel('Outer iterations')
#ax.yaxis.set_major_formatter(mtick.FormatStrFormatter('%1e1'))
#plt.legend(loc=1)
#axins = inset_axes(ax,
#                   width="20%", # width = 30% of parent_bbox
#                   height="20%", # height : 1 inch
#                   bbox_to_anchor=(-360, -20, 650, 450),
#                   loc=5)

#axins.plot(subaxis,cost1_mean[0:3],'-')
#axins.plot(subaxis,cost2_mean[0:3],'--')
#axins.plot(subaxis,cost3_mean[0:3],'-.')
#axins.plot(subaxis,cost4_mean[0:3],':')
#plt.xticks(np.arange(0,3,1))
#mark_inset(ax, axins, loc1=2, loc2=4,  ec="0.5")
plt.savefig('constraint_violation.pdf') 
plt.show()

 
fig, ax= plt.subplots()
ax.plot(axis,false1_positive,'-')
ax.plot(axis,false2_positive,'--')
ax.plot(axis,false3_positive,'-.')
ax.plot(axis,false4_positive,':')
ax.plot(axis,centralized_false_positive,'.',alpha=0.5)
ax.plot(axis,MCE_false_positive,'<-',color='tab:gray',alpha=1.0,markersize=3)
ax.plot(axis,ME_false_positive,'>-',color='tab:olive',alpha=1.0,markersize=3)
plt.xticks(np.arange(0,iterations,10))
plt.xlabel('Outer iterations')
#plt.legend(loc=4)
#axins = inset_axes(ax,
#                   width="20%", # width = 30% of parent_bbox
#                   height="20%", # height : 1 inch
#                   bbox_to_anchor=(-260, -90, 650, 450),
#                   loc=5)
#axins.plot(subaxis,likelihood1[0:3],'-')
#axins.plot(subaxis,likelihood2[0:3],'--')
#axins.plot(subaxis,likelihood3[0:3],'-.')
#axins.plot(subaxis,likelihood4[0:3],':')
#plt.xticks(np.arange(0,3,1))
#axins.set_ylim(axins.get_ylim()[::-1])
#axins.yaxis.set_major_formatter(mtick.FixedFormatter(['',r'$-5 \times 10^3$','$-10^4$']))
#mark_inset(ax, axins, loc1=2, loc2=4,  ec="0.5")

plt.savefig('false_positive.pdf')  
plt.show()


fig, ax= plt.subplots()
ax.plot(axis,false1_negative,'-')
ax.plot(axis,false2_negative,'--')
ax.plot(axis,false3_negative,'-.')
ax.plot(axis,false4_negative,':')
ax.plot(axis,centralized_false_negative,'.',alpha=0.5)
ax.plot(axis,MCE_false_negative,'<-',color='tab:gray',alpha=1.0,markersize=3)
ax.plot(axis,ME_false_negative,'>-',color='tab:olive',alpha=1.0,markersize=3)
plt.xticks(np.arange(0,iterations,10))
plt.xlabel('Outer iterations')
#plt.legend(loc=4)
#axins = inset_axes(ax,
#                   width="20%", # width = 30% of parent_bbox
#                   height="20%", # height : 1 inch
#                   bbox_to_anchor=(-260, -90, 650, 450),
#                   loc=5)
#axins.plot(subaxis,likelihood1[0:3],'-')
#axins.plot(subaxis,likelihood2[0:3],'--')
#axins.plot(subaxis,likelihood3[0:3],'-.')
#axins.plot(subaxis,likelihood4[0:3],':')
#plt.xticks(np.arange(0,3,1))
#axins.set_ylim(axins.get_ylim()[::-1])
#axins.yaxis.set_major_formatter(mtick.FixedFormatter(['',r'$-5 \times 10^3$','$-10^4$']))
#mark_inset(ax, axins, loc1=2, loc2=4,  ec="0.5")

plt.savefig('false_negative.pdf')  
plt.show()






















